Introduction

Reads were first de-multiplexed with ea-utils (fastq-multx) with an allowed barcode mismatch of 1 (Default). De-multiplexed reads were then subjected to an alignment against the UniVec vector contamination database using the adapter removal tool within ea-utils (fastq-mcf). Barcodes were extracted by remove the last 12 nucleotides from each read.

Next reads were subjected to an alignment against the Oligo library using a DNA aligner. We used Bowtie2…

Data import & preprocessing

We are going to first import the merged counts tables that is outputted from the Nextflow pipeline. This table already has the raw counts summarised from the alignments and merged into a single matrix.

# Import counts
cd8.counts = readRDS('../data/CD8-counts.rda') %>% 
  rownames_to_column("Id") %>% 
  mutate(Id = recode(Id, oligo_1899 = "MART1_ELA_a",
                         oligo_4281 = "MART1_ELA_b",
                         oligo_913 = "MART1_a",
                         oligo_3295 = "MART1_b",
                         oligo_1887 = "CDK4mut_a",
                         oligo_4269 = "CDK4mut_b",
                         oligo_1888 = "CDK4wt_a",
                         oligo_4270 = "CDK4wt_b",
                         oligo_1889 = "RPS3Amut_a",
                         oligo_4271 = "RPS3Amut_b",
                         oligo_1890 = "RPS3Awt_a",
                         oligo_4272 = "RPS3Awt_b",
                         oligo_1895 = "MANSC1mut_a",
                         oligo_4277 = "MANSC1mut_b",
                         oligo_1896 = "MANSC1wt_a",
                         oligo_4278 = "MANSC1wt_b")) %>% 
  column_to_rownames("Id")

# Convert to long format
cd8.counts.long = cd8.counts %>% 
  rownames_to_column("Id") %>% 
  pivot_longer(cols = -c('Id'), names_to = 'sample', values_to = 'counts')

# Import CD4 metadata
cd8.metadata = readRDS('../data/CD8-metadata.rda')

# Import MultiQC results
cd8.report = readRDS('../data/CD8-multiqc.rda')

# Import oligo annotations
oligo.genes = readRDS('../data/oligo-genes.rda')

Summary stats

# Mean
mean(cd8.report$raw_total_sequences)
## [1] 14162045
# Median
median(cd8.report$raw_total_sequences)
## [1] 13417612
# Range
range(cd8.report$raw_total_sequences)
## [1]  8434046 23626015

Mapped reads per sample

cd8.report %>% 
  select(Sample, reads_unmapped, reads_mapped) %>% 
  mutate(Sample = str_replace(Sample, '-bamstats', '')) %>% 
  pivot_longer(cols = c("reads_unmapped", "reads_mapped"), names_to = "feature", values_to = "reads") %>% 
  ggplot(aes(x = Sample, y = reads, fill = feature)) +
  geom_col() +
  coord_flip() +
  theme_light(base_size = 11) +
  theme(legend.position = "top") +
  scale_fill_aaas() +
  xlab("") +
  ylab("Number of reads")

cd8.report %>% 
  select(Sample, reads_unmapped_percent, reads_mapped_percent) %>% 
  mutate(Sample = str_replace(Sample, '-bamstats', '')) %>% 
  pivot_longer(cols = c("reads_unmapped_percent", "reads_mapped_percent"), names_to = "feature", values_to = "reads") %>% 
  mutate(reads = reads / 100) %>% 
  ggplot(aes(x = Sample, y = reads, fill = feature)) +
  geom_col() +
  coord_flip() +
  theme_light(base_size = 11) +
  theme(legend.position = "top") +
  scale_fill_aaas() +
  scale_y_continuous(labels = scales::percent) +
  xlab("") +
  ylab("Percent of reads")

Mock replicates

cd8.counts %>% 
  ggplot(., aes(x = log10(mock_a), y = log10(mock_b))) +
  geom_point(colour = alpha("grey", 0.7)) +
  xlab("Mock replicate #1 (log10)") +
  ylab("Mock replicate #2 (log10)") +
  theme_light(base_size = 11) +
  stat_cor(method = "pearson", aes(label = ..r.label..)) 

1D3 replicates

cd8.counts %>% 
  ggplot(aes(x = log10(id3_a), y = log10(id3_b))) +
  geom_point(colour = alpha("grey", 0.7)) +
  xlab("1D3 replicate #1 (log10)") +
  ylab("1D3 replicate #2 (log10)") +
  theme_light(base_size = 11) +
  stat_cor(method = "pearson", aes(label = ..r.label..)) 

CDK4-53 replicates

cd8.counts %>% 
  ggplot(aes(x = log10(cdk453_a), y = log10(cdk453_b))) +
  geom_point(colour = alpha("grey", 0.7)) +
  xlab("CDK4/53 replicate #1 (log10)") +
  ylab("CDK4/53 replicate #2 (log10)") +
  theme_light(base_size = 11) +
  stat_cor(method = "pearson", aes(label = ..r.label..)) 

Dilution 1:10 replicates

cd8.counts %>% 
  ggplot(aes(x = log10(d1_10_a), y = log10(d1_10_b))) +
  geom_point(colour = alpha("grey", 0.7)) +
  xlab("1:10 replicate #1 (log10)") +
  ylab("1:10 replicate #2 (log10)") +
  theme_light(base_size = 11) +
  stat_cor(method = "pearson", aes(label = ..r.label..))

Dilution 1:100 replicates

cd8.counts %>% 
  ggplot(aes(x = log10(d1_100_a), y = log10(d1_100_b))) +
  geom_point(colour = alpha("grey", 0.7)) +
  xlab("1:100 replicate #1 (log10)") +
  ylab("1:100 replicate #2 (log10)") +
  theme_light(base_size = 11) +
  stat_cor(method = "pearson", aes(label = ..r.label..)) 

Dilution 1:300 replicates

cd8.counts %>% 
  ggplot(aes(x = log10(d1_300_a), y = log10(d1_300_b))) +
  geom_point(colour = alpha("grey", 0.7)) +
  xlab("1:300 replicate #1 (log10)") +
  ylab("1:300 replicate #2 (log10)") +
  theme_light(base_size = 11) +
  stat_cor(method = "pearson", aes(label = ..r.label..)) 

Dilution 1:1000 replicates

cd8.counts %>% 
  ggplot(aes(x = log10(d1_1000_a), y = log10(d1_1000_b))) +
  geom_point(colour = alpha("grey", 0.7)) +
  xlab("1:1000 replicate #1 (log10)") +
  ylab("1:1000 replicate #2 (log10)") +
  theme_light(base_size = 11) +
  stat_cor(method = "pearson", aes(label = ..r.label..)) 

Calculate oligo-rep variance

# Calculate the variance between oligo-replicates
oligo.variance = cd8.counts %>% 
  rownames_to_column("Id") %>% 
  mutate_if(is.numeric, funs(log10)) %>% 
  left_join(oligo.genes, by = "Id") %>% 
  group_by(oligoGroup) %>% 
  summarise(mock_a = diff(mock_a),
            mock_b = diff(mock_b),
            cdk453_a = diff(cdk453_a),
            cdk453_b = diff(cdk453_b),
            id3_a = diff(id3_a),
            id3_b = diff(id3_b),
            d1_10_a = diff(d1_10_a),
            d1_10_b = diff(d1_10_b),
            d1_100_a = diff(d1_100_a),
            d1_100_b = diff(d1_100_b),
            d1_300_a = diff(d1_300_a),
            d1_300_b = diff(d1_300_b),
            d1_1000_a = diff(d1_1000_a),
            d1_1000_b = diff(d1_1000_b))
## `summarise()` has grouped output by 'oligoGroup'. You can override using the `.groups` argument.
# Plot histogram of variance across samples
oligo.variance %>% 
  pivot_longer(-oligoGroup, names_to = "Sample", values_to = "difference") %>% 
  ggplot(aes(x = difference)) +
  geom_histogram(bins = 100) +
  theme_bw() +
  facet_wrap(.~Sample) +
  xlab("Difference (log10)")

Median-normalization

Prepare data for DESeq2 object

# Match the sample ID's between samples
cd8.counts = cd8.counts[,match(cd8.metadata$Sample, colnames(cd8.counts))]

# Make DEseq2 object
ddsMat.cd8 <- DESeqDataSetFromMatrix(countData = cd8.counts,
                                     colData = cd8.metadata,
                                     design = ~Group)
## converting counts to integer mode

Normalize

# Get normalize counts
ddsMat.cd8 <- estimateSizeFactors(ddsMat.cd8)
ddsMat.cd8 <- estimateDispersions(ddsMat.cd8, fitType='local')
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
# Get normalized counts 
cd8.norm <- counts(ddsMat.cd8, normalized = TRUE) %>% 
  as.data.frame() %>% 
  rownames_to_column("Id")

# Write tables to disk
write_tsv(cd8.counts, "tables/cd8-counts.tsv")
write_tsv(cd8.norm, "tables/cd8-counts-norm.tsv")

# Create a long table 
cd8.norm.long = cd8.norm %>% 
  gather(sample, norm.counts, -Id) 

Plot boxplots

# Pre-normalized
p1 = cd8.counts.long %>% 
  ggplot(aes(x = sample, y = log10(counts + 1))) +
  geom_boxplot() +
  theme_light(base_size = 11) +
  theme(axis.text.x = element_text(angle = 90)) +
  xlab("") +
  ylab("Counts (log10)") +
  ggtitle('Un-normalized data')

# After normalized
p2 = cd8.norm.long %>% 
  ggplot(aes(x = sample, y = log10(norm.counts + 1))) +
  geom_boxplot() +
  theme_light(base_size = 11) +
  theme(axis.text.x = element_text(angle = 90)) +
  xlab("") +
  ylab("Normalized counts (log10)") +
  ggtitle('Median-normalized data')

# Plot side-by side
p1 + p2

Plot normalized histogram

cd8.norm.long %>% 
  ggplot(aes(x = log10(norm.counts))) +
  geom_histogram(aes(y=..density..), binwidth=0.25, colour="black", fill="white") +
  geom_density(alpha = 0.2, fill = "#FF6666") +
  facet_wrap(sample~.) +
  theme_light(base_size = 11) +
  ylab("Density") +
  xlab("Normalized reads per oligo (log10)") 

MART1/1D3 dropout

# Get average between the replicates
id3.mean = cd8.norm %>% 
  mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
         ID3_mean = rowMeans(select(., id3_a, id3_b))) %>% 
  mutate(M = log2(ID3_mean / Mock_mean),
         A = (log2(ID3_mean) + log2(Mock_mean)) / 2) 

# Plot scatterplot
p1 = id3.mean %>% 
  ggplot(aes(x = log10(Mock_mean), y = log10(ID3_mean))) +
  geom_point(colour = alpha("grey", 0.7)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  xlab("Mock normalized counts (log10)") +
  ylab("1D3 normalized counts (log10)")

# Plot MA plot
p2 = id3.mean %>% 
  ggplot(aes(x = A, y = M)) +
  geom_point(colour = alpha("grey", 0.5)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
  ylab(expression(paste("Fold change ", '(', log[2], " 1D3/Mock)"))) 

# Plot side-by side
p1 + p2

Calculate statistics

# Create new DESeq2 object with poor samples removed
ddsMat.subset = ddsMat.cd8[,which(colnames(assay(ddsMat.cd8)) %in% c("mock_a", "mock_b", "id3_a", "id3_b"))]
ddsMat.subset$Group = droplevels(ddsMat.subset$Group)
ddsMat.subset$Group = relevel(ddsMat.subset$Group, "Mock")

# Run stats
ddsMat.subset <- DESeq(ddsMat.subset)
## using pre-existing size factors
## estimating dispersions
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
# Get results (ID3 / Mock)
results(ddsMat.subset, alpha = 0.05) %>% 
  summary()
## 
## out of 4762 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 0, 0%
## LFC < 0 (down)     : 5, 0.1%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Get table of results
id3.mock.res = results(ddsMat.subset, alpha = 0.05)

# View table
id3.mock.res %>% 
  as.data.frame() %>% 
  rownames_to_column("Id") %>% 
  filter(padj < 0.05, abs(log2FoldChange) > 1)
##            Id baseMean log2FoldChange     lfcSE      stat        pvalue
## 1 MART1_ELA_a 1616.221      -2.592509 0.1668882 -15.53441  2.029294e-54
## 2     MART1_b 2359.897      -1.175441 0.1121242 -10.48339  1.029893e-25
## 3 MART1_ELA_b 1750.376      -4.113238 0.1543848 -26.64277 2.170495e-156
##            padj
## 1  4.831750e-51
## 2  1.634784e-22
## 3 1.033590e-152
# Show quick plot of significance ()
DESeq2::plotMA(id3.mock.res, ylim = c(-3, 3))

CDK4/53 dropout

# Get average between the replicates
cdk4.mean = cd8.norm %>% 
  mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
         CDK4_mean = rowMeans(select(., cdk453_a, cdk453_b))) %>% 
  mutate(M = log2(CDK4_mean / Mock_mean),
         A = (log2(CDK4_mean) + log2(Mock_mean)) / 2) 

# Plot scatterplot
p1 = cdk4.mean %>% 
  ggplot(aes(x = log10(Mock_mean), y = log10(CDK4_mean))) +
  geom_point(colour = alpha("grey", 0.7)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  xlab("Mock normalized counts (log10)") +
  ylab("CDK4 normalized counts (log10)")

# Plot MA plot
p2 = cdk4.mean %>% 
  ggplot(aes(x = A, y = M)) +
  geom_point(colour = alpha("grey", 0.5)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
  ylab(expression(paste("Fold change ", '(', log[2], " CDK4/Mock)"))) 

p1 + p2

Calculate statistics

# Create new DESeq2 object with poor samples removed
ddsMat.subset = ddsMat.cd8[,which(colnames(assay(ddsMat.cd8)) %in% c("mock_a", "mock_b", "cdk453_a", "cdk453_b"))]
ddsMat.subset$Group = droplevels(ddsMat.subset$Group)
ddsMat.subset$Group = relevel(ddsMat.subset$Group, "Mock")

# Run stats
ddsMat.subset <- DESeq(ddsMat.subset)
## using pre-existing size factors
## estimating dispersions
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
# Get results (CDK4 / Mock)
results(ddsMat.subset, alpha = 0.05) %>% 
  summary()
## 
## out of 4759 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 3, 0.063%
## LFC < 0 (down)     : 4, 0.084%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Get table of results
cdk4.mock.res = results(ddsMat.subset, alpha = 0.05)

# View table
cdk4.mock.res %>% 
  as.data.frame() %>% 
  rownames_to_column("Id") %>% 
  filter(padj < 0.05 & abs(log2FoldChange) > 1)
##          Id baseMean log2FoldChange     lfcSE      stat       pvalue
## 1 CDK4mut_a 772.9214      -2.890157 0.2133074 -13.54925 8.003909e-42
## 2 CDK4mut_b 620.7388      -4.422590 0.3231989 -13.68380 1.268798e-42
##           padj
## 1 1.904530e-38
## 2 6.038211e-39
# Show quick plot of significance ()
DESeq2::plotMA(cdk4.mock.res, ylim = c(-5,5))

MART1/CDK4 dropout

Oligo Id’s for the positive control dropout’s are: - oligo_1899 (MART1_ELA) - oligo_4281 (MART1_ELA) - oligo_912 (MART1) - oligo_3294 (MART1)

# Get average between the replicates
id3.mean = cd8.norm %>% 
  mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
         CDK4_mean = rowMeans(select(., cdk453_a, cdk453_b)),
         ID3_mean = rowMeans(select(., id3_a, id3_b))) %>% 
  mutate(M = log2(ID3_mean / CDK4_mean),
         A = (log2(ID3_mean) + log2(CDK4_mean)) / 2) 

# Plot scatterplot
p1 = id3.mean %>% 
  ggplot(aes(x = log10(CDK4_mean), y = log10(ID3_mean))) +
  geom_point(colour = alpha("grey", 0.7)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  xlab("CDK4 normalized counts (log10)") +
  ylab("1D3 normalized counts (log10)") 

# Plot MA plot
p2 = id3.mean %>% 
  ggplot(aes(x = A, y = M)) +
  geom_point(colour = alpha("grey", 0.5)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
  ylab(expression(paste("Fold change ", '(', log[2], " 1D3/CDK4)"))) 

p1 + p2

Calculate statistics

# Create new DESeq2 object with poor samples removed
ddsMat.subset = ddsMat.cd8[,which(colnames(assay(ddsMat.cd8)) %in% c("cdk453_a", "cdk453_b", "id3_a", "id3_b"))]
ddsMat.subset$Group = droplevels(ddsMat.subset$Group)
ddsMat.subset$Group = relevel(ddsMat.subset$Group, "CDK4")

# Run stats
ddsMat.subset <- DESeq(ddsMat.subset)
## using pre-existing size factors
## estimating dispersions
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
# Get results (ID3 / CDK4)
results(ddsMat.subset, alpha = 0.05) %>% 
  summary()
## 
## out of 4750 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 4, 0.084%
## LFC < 0 (down)     : 5, 0.11%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Get table of results
id3.cdk4.res = results(ddsMat.subset, alpha = 0.05)

# View table
id3.cdk4.res %>% 
  as.data.frame() %>% 
  rownames_to_column("Id") %>% 
  filter(padj < 0.05 & abs(log2FoldChange) > 1)
##            Id  baseMean log2FoldChange     lfcSE      stat        pvalue
## 1   CDK4mut_a  804.7891       2.955708 0.1968698  15.01352  5.988381e-51
## 2 MART1_ELA_a 1499.7890      -2.466632 0.1630949 -15.12390  1.126636e-51
## 3     MART1_b 2439.4898      -1.244110 0.1100652 -11.30339  1.262446e-29
## 4   CDK4mut_b  730.6316       4.667656 0.2748360  16.98342  1.089508e-64
## 5 MART1_ELA_b 1793.0922      -4.150197 0.1537456 -26.99393 1.741353e-160
##            padj
## 1  7.111203e-48
## 2  1.783841e-48
## 3  1.199324e-26
## 4  2.587582e-61
## 5 8.271426e-157
# Show quick plot of significance ()
DESeq2::plotMA(id3.cdk4.res)

Dilutions 1:10 dropout

# Get average between the replicates
d10.mean = cd8.norm %>% 
  mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
         D10_mean = rowMeans(select(., d1_10_a, d1_10_b))) %>% 
  mutate(M = log2(D10_mean / Mock_mean),
         A = (log2(D10_mean) + log2(Mock_mean)) / 2) 

# Plot scatterplot
d10.scatter = d10.mean %>% 
  ggplot(aes(x = log10(Mock_mean), y = log10(D10_mean))) +
  geom_point(colour = alpha("grey", 0.7)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  xlab("Mock normalized counts (log10)") +
  ylab("D10 normalized counts (log10)")

# Plot MA plot
d10.ma = d10.mean %>% 
  ggplot(aes(x = A, y = M)) +
  geom_point(colour = alpha("grey", 0.5)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  ylim(-5, 5) +
  xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
  ylab(expression(paste("Fold change ", '(', log[2], " D10/Mock)")))

d10.scatter + d10.ma

Calculate statistics

# Create new DESeq2 object with poor samples removed
ddsMat.subset = ddsMat.cd8[,which(colnames(assay(ddsMat.cd8)) %in% c("mock_a", "mock_b", "d1_10_a", "d1_10_b"))]
ddsMat.subset$Group = droplevels(ddsMat.subset$Group)
ddsMat.subset$Group = relevel(ddsMat.subset$Group, "Mock")

# Run stats
ddsMat.subset <- DESeq(ddsMat.subset)
## using pre-existing size factors
## estimating dispersions
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
# Get results (D10 / Mock)
results(ddsMat.subset, alpha = 0.05) %>% 
  summary()
## 
## out of 4759 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 0, 0%
## LFC < 0 (down)     : 7, 0.15%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Get table of results
d10.mock.res = results(ddsMat.subset, alpha = 0.05)

# View table
d10.mock.res %>% 
  as.data.frame() %>% 
  rownames_to_column("Id") %>% 
  filter(padj < 0.05 & abs(log2FoldChange) > 1)
##            Id  baseMean log2FoldChange     lfcSE      stat        pvalue
## 1   CDK4mut_a  800.8005      -2.507273 0.2217680 -11.30584  1.227801e-29
## 2 MART1_ELA_a 1621.4895      -2.555543 0.1588604 -16.08672  3.161408e-58
## 3   CDK4mut_b  624.8460      -4.227006 0.2828515 -14.94426  1.697680e-50
## 4 MART1_ELA_b 1735.4737      -4.350605 0.1653703 -26.30826 1.542788e-152
##            padj
## 1  1.460776e-26
## 2  7.522572e-55
## 3  2.693086e-47
## 4 7.342126e-149
# Show quick plot of significance ()
DESeq2::plotMA(d10.mock.res)

Dilutions 1:100 dropout

# Get average between the replicates
d100.mean = cd8.norm %>% 
  mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
         D100_mean = rowMeans(select(., d1_100_a, d1_100_b))) %>% 
  mutate(M = log2(D100_mean / Mock_mean),
         A = (log2(D100_mean) + log2(Mock_mean)) / 2) 

# Plot scatterplot
d100.scatter = d100.mean %>% 
  ggplot(aes(x = log10(Mock_mean), y = log10(D100_mean))) +
  geom_point(colour = alpha("grey", 0.7)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  xlab("Mock normalized counts (log10)") +
  ylab("D100 normalized counts (log10)")

# Plot MA plot
d100.ma = d100.mean %>% 
  ggplot(aes(x = A, y = M)) +
  geom_point(colour = alpha("grey", 0.5)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  ylim(-5, 5) +
  xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
  ylab(expression(paste("Fold change ", '(', log[2], " D100/Mock)")))

d100.scatter + d100.ma

Calculate statistics

# Create new DESeq2 object with poor samples removed
ddsMat.subset = ddsMat.cd8[,which(colnames(assay(ddsMat.cd8)) %in% c("mock_a", "mock_b", "d1_100_a", "d1_100_b"))]
ddsMat.subset$Group = droplevels(ddsMat.subset$Group)
ddsMat.subset$Group = relevel(ddsMat.subset$Group, "Mock")

# Run stats
ddsMat.subset <- DESeq(ddsMat.subset)
## using pre-existing size factors
## estimating dispersions
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
# Get results (D100 / Mock)
results(ddsMat.subset, alpha = 0.05) %>% 
  summary()
## 
## out of 4763 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 9, 0.19%
## LFC < 0 (down)     : 13, 0.27%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Get table of results
d100.mock.res = results(ddsMat.subset, alpha = 0.05)

# View table
d100.mock.res %>% 
  as.data.frame() %>% 
  rownames_to_column("Id") %>% 
  filter(padj < 0.05 & abs(log2FoldChange) > 1)
##            Id  baseMean log2FoldChange     lfcSE       stat        pvalue
## 1   CDK4mut_a  837.8370      -2.121543 0.1884076 -11.260387  2.058525e-29
## 2 MART1_ELA_a 1685.6399      -2.207934 0.1242659 -17.767816  1.254810e-70
## 3   CDK4mut_b  716.7704      -2.263236 0.5349687  -4.230595  2.330739e-05
## 4 MART1_ELA_b 1916.8408      -2.656286 0.1128993 -23.527917 2.113133e-122
##            padj
## 1  3.268252e-26
## 2  2.988329e-67
## 3  1.264367e-02
## 4 1.006485e-118
# Show quick plot of significance ()
DESeq2::plotMA(d100.mock.res)

Dilutions 1:300 dropout

# Get average between the replicates
d300.mean = cd8.norm %>% 
  mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
         D300_mean = rowMeans(select(., d1_300_a, d1_300_b))) %>% 
  mutate(M = log2(D300_mean / Mock_mean),
         A = (log2(D300_mean) + log2(Mock_mean)) / 2) 

# Plot scatterplot
d300.scatter = d300.mean %>% 
  ggplot(aes(x = log10(Mock_mean), y = log10(D300_mean))) +
  geom_point(colour = alpha("grey", 0.7)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  xlab("Mock normalized counts (log10)") +
  ylab("D300 normalized counts (log10)")

# Plot MA plot
d300.ma = d300.mean %>% 
  ggplot(aes(x = A, y = M)) +
  geom_point(colour = alpha("grey", 0.5)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  ylim(-5, 5) +
  xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
  ylab(expression(paste("Fold change ", '(', log[2], " D300/Mock)")))

d300.scatter + d300.ma

Calculate statistics

# Create new DESeq2 object with poor samples removed
ddsMat.subset = ddsMat.cd8[,which(colnames(assay(ddsMat.cd8)) %in% c("mock_a", "mock_b", "d1_300_a", "d1_300_b"))]
ddsMat.subset$Group = droplevels(ddsMat.subset$Group)
ddsMat.subset$Group = relevel(ddsMat.subset$Group, "Mock")

# Run stats
ddsMat.subset <- DESeq(ddsMat.subset)
## using pre-existing size factors
## estimating dispersions
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
# Get results (D300 / Mock)
results(ddsMat.subset, alpha = 0.05) %>% 
  summary()
## 
## out of 4762 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 8, 0.17%
## LFC < 0 (down)     : 7, 0.15%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Get table of results
d300.mock.res = results(ddsMat.subset, alpha = 0.05)

# View table
d300.mock.res %>% 
  as.data.frame() %>% 
  rownames_to_column("Id") %>% 
  filter(padj < 0.05 & abs(log2FoldChange) > 1)
##            Id baseMean log2FoldChange     lfcSE       stat       pvalue
## 1 MART1_ELA_a 2044.892      -1.071998 0.1144178  -9.369152 7.311744e-21
## 2 MART1_ELA_b 2355.140      -1.239435 0.1006218 -12.317753 7.269179e-35
##           padj
## 1 1.740926e-17
## 2 3.461583e-31
# Show quick plot of significance ()
DESeq2::plotMA(d300.mock.res)

Dilutions 1:1000 dropout

# Get average between the replicates
d1000.mean = cd8.norm %>% 
  mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
         D1000_mean = rowMeans(select(., d1_1000_a, d1_1000_b))) %>% 
  mutate(M = log2(D1000_mean / Mock_mean),
         A = (log2(D1000_mean) + log2(Mock_mean)) / 2) 

# Plot scatterplot
d1000.scatter = d1000.mean %>% 
  ggplot(aes(x = log10(Mock_mean), y = log10(D1000_mean))) +
  geom_point(colour = alpha("grey", 0.7)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  xlab("Mock normalized counts (log10)") +
  ylab("D1000 normalized counts (log10)")

# Plot MA plot
d1000.ma = d1000.mean %>% 
  ggplot(aes(x = A, y = M)) +
  geom_point(colour = alpha("grey", 0.5)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  ylim(-5, 5) +
  xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
  ylab(expression(paste("Fold change ", '(', log[2], " D1000/Mock)")))

d1000.scatter + d1000.ma

Calculate statistics

# Create new DESeq2 object with poor samples removed
ddsMat.subset = ddsMat.cd8[,which(colnames(assay(ddsMat.cd8)) %in% c("mock_a", "mock_b", "d1_1000_a", "d1_1000_b"))]
ddsMat.subset$Group = droplevels(ddsMat.subset$Group)
ddsMat.subset$Group = relevel(ddsMat.subset$Group, "Mock")

# Run stats
ddsMat.subset <- DESeq(ddsMat.subset)
## using pre-existing size factors
## estimating dispersions
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
# Get results (D1000 / Mock)
results(ddsMat.subset, alpha = 0.05) %>% 
  summary()
## 
## out of 4762 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 0, 0%
## LFC < 0 (down)     : 2, 0.042%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Get table of results
d1000.mock.res = results(ddsMat.subset, alpha = 0.05)

# View table
d1000.mock.res %>% 
  as.data.frame() %>% 
  rownames_to_column("Id") %>% 
  filter(padj < 0.05 & abs(log2FoldChange) > 1)
## [1] Id             baseMean       log2FoldChange lfcSE          stat          
## [6] pvalue         padj          
## <0 rows> (or 0-length row.names)
# Show quick plot of significance ()
DESeq2::plotMA(d1000.mock.res)

Plot serial dilutions together

# Scatter plot
d10.scatter + d100.scatter + d300.scatter + d1000.scatter 

# MA plot
d10.ma + d100.ma + d300.ma + d1000.ma

Filter low count oligos (visualization)

# Rank
cd8.norm.long %>% 
  group_by(Id) %>% 
  summarise(sum = sum(norm.counts),
            mean = mean(norm.counts),
            median = median(norm.counts),
            sd = sd(norm.counts),
            geomean = gm_mean(norm.counts),
            cv = sd(norm.counts) / mean(norm.counts))  %>% 
  mutate(rank = rank(mean)) %>% 
  ggplot(aes(x = rank, y = log10(mean))) +
  geom_point() +
  theme_minimal()

Plot average abundance

# Get mean counts per oligo
cd8.mean = cd8.norm.long %>% 
  group_by(Id) %>% 
  summarise(sum = sum(norm.counts),
            mean = mean(norm.counts),
            median = median(norm.counts),
            sd = sd(norm.counts),
            geomean = gm_mean(norm.counts),
            cv = sd(norm.counts) / mean(norm.counts)) 

# Get quantiles
quantile(cd8.mean$median , seq(from = 0, to = 0.10, by = 0.005))
##         0%       0.5%         1%       1.5%         2%       2.5%         3% 
##   0.000000   0.318799   1.062006   1.978373   9.955168  27.003683  74.638207 
##       3.5%         4%       4.5%         5%       5.5%         6%       6.5% 
## 142.062296 218.769373 319.850709 396.491177 445.067499 500.165253 551.465274 
##         7%       7.5%         8%       8.5%         9%       9.5%        10% 
## 592.921535 651.214172 689.947011 741.235201 767.365712 801.337192 831.117392
# Histogram with 5% cutoff
cd8.norm.long %>% 
  group_by(Id) %>% 
  summarise(total.count = sum(norm.counts),
            mean.count = mean(norm.counts),
            median = median(norm.counts),
            geomean = gm_mean(norm.counts),
            sd.count = sd(norm.counts)) %>% 
  ggplot(aes(x = log10(median))) +
  geom_histogram(binwidth = 0.25, colour = "black", fill = "white") +
  geom_density(alpha = 0.2, fill = "#FF6666") +
  geom_vline(xintercept = log10(396.491177), color = "red") +
  theme_light(base_size = 11) +
  xlab("Normalized reads per oligo (log10)")

Apply 5% cutoff

# Get ID's within the limits
## Cutoff = 5%
idx <- cd8.norm.long %>% 
  group_by(Id) %>% 
  summarise(sum = sum(norm.counts),
            mean = mean(norm.counts),
            median = median(norm.counts),
            geomean = gm_mean(norm.counts),
            sd = sd(norm.counts) ,
            c.v = sd/mean) %>% 
  filter(median >= 396.491177) %>% 
  pull(Id)

# Filter table
cd8.norm.fil = cd8.norm %>% 
  filter(Id %in% idx)

# Write filter table to disk
write_tsv(cd8.norm, "tables/cd8-counts-norm-filtered.tsv")

# Get dimensions
dim(cd8.norm)
## [1] 4764   15
dim(cd8.norm.fil)
## [1] 4525   15
# Create long dataframe
cd8.norm.fil.long = cd8.norm.fil %>% 
  gather(sample, norm.counts, -Id) 

# Plot boxplot
cd8.norm.fil.long %>% 
  ggplot(aes(x = sample, y = log10(norm.counts + 1))) +
  geom_boxplot() +
  theme_light(base_size = 11) +
  theme(axis.text.x = element_text(angle = 90)) +
  xlab("") +
  ylab("Normalized counts (log10)") +
  ggtitle('Median-normalized data w/ filtering') 

# Histogram all
cd8.norm.fil.long %>% 
  ggplot(aes(x = log10(norm.counts))) +
  geom_histogram(aes(y=..density..), binwidth=0.25, colour="black", fill="white") +
  geom_density(alpha = 0.2, fill = "#FF6666") +
  facet_wrap(sample~.) +
  theme_light(base_size = 11) +
  ylab("Density") +
  xlab("(Filtered) Normalized reads per oligo (log10)")

MA plot (1D3/CDK4.53)

# Get average between the replicates
id3.mean = cd8.norm.fil %>% 
  mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
         CDK4_mean = rowMeans(select(., cdk453_a, cdk453_b)),
         ID3_mean = rowMeans(select(., id3_a, id3_b))) %>% 
  mutate(M = log2(ID3_mean / CDK4_mean),
         A = (log2(ID3_mean) + log2(CDK4_mean)) / 2) 

# Plot MA plot
id3.mean %>% 
  ggplot(aes(x = A, y = M)) +
  geom_point(colour = alpha("grey", 0.5)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
  ylab(expression(paste("Fold change ", '(', log[2], " 1D3/CDK4)"))) 

### Titration data

# 1:10
d10.mean = cd8.norm.fil %>% 
  mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
         D10_mean = rowMeans(select(., d1_10_a, d1_10_b))) %>% 
  mutate(M = log2(D10_mean / Mock_mean),
         A = (log2(D10_mean) + log2(Mock_mean)) / 2) 

d10.plot = d10.mean %>% 
  ggplot(aes(x = A, y = M)) +
  geom_point(colour = alpha("grey", 0.5)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  ylim(-5, 1) +
  xlim(7, 14) +
  xlab("") +
  ylab("")
d10.plot 

# 1:100
d100.mean = cd8.norm.fil %>% 
  mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
         D100_mean = rowMeans(select(., d1_100_a, d1_100_b))) %>% 
  mutate(M = log2(D100_mean / Mock_mean),
         A = (log2(D100_mean) + log2(Mock_mean)) / 2) 

d100.plot = d100.mean %>% 
  ggplot(aes(x = A, y = M)) +
  geom_point(colour = alpha("grey", 0.5)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  ylim(-5, 1) +
  xlim(7, 14) +
  xlab("") +
  ylab("")
d100.plot

# 1:300
d300.mean = cd8.norm.fil %>% 
  mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
         D300_mean = rowMeans(select(., d1_300_a, d1_300_b))) %>% 
  mutate(M = log2(D300_mean / Mock_mean),
         A = (log2(D300_mean) + log2(Mock_mean)) / 2) 

d300.plot = d300.mean %>% 
  ggplot(aes(x = A, y = M)) +
  geom_point(colour = alpha("grey", 0.5)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  ylim(-5, 1) +
  xlim(7, 14) +
  xlab("") +
  ylab(expression(paste("Fold change ", '(', log[2], " D300/Mock)"))) 
d300.plot

# 1:1000
d1000.mean = cd8.norm.fil %>% 
  mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
         D1000_mean = rowMeans(select(., d1_1000_a, d1_1000_b))) %>% 
  mutate(M = log2(D1000_mean / Mock_mean),
         A = (log2(D1000_mean) + log2(Mock_mean)) / 2) 

d1000.plot = d1000.mean %>% 
  ggplot(aes(x = A, y = M)) +
  geom_point(colour = alpha("grey", 0.5)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(Id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(Id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(Id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(Id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  theme_light(base_size = 11) +
  ylim(-5, 1) +
  xlim(7, 14) +
  xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
  ylab("")
d1000.plot

# Plot as single panel
d10.plot + d100.plot + d300.plot + d1000.plot

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggpubr_0.4.0                forcats_0.5.1              
##  [3] stringr_1.4.0               dplyr_1.0.5                
##  [5] purrr_0.3.4                 readr_1.4.0                
##  [7] tidyr_1.1.3                 tibble_3.1.0               
##  [9] ggplot2_3.3.3               tidyverse_1.3.0            
## [11] readxl_1.3.1                DESeq2_1.30.1              
## [13] SummarizedExperiment_1.20.0 Biobase_2.50.0             
## [15] MatrixGenerics_1.2.1        matrixStats_0.58.0         
## [17] GenomicRanges_1.42.0        GenomeInfoDb_1.26.4        
## [19] IRanges_2.24.1              S4Vectors_0.28.1           
## [21] BiocGenerics_0.36.0         ggsci_2.9                  
## [23] patchwork_1.1.1             knitr_1.31                 
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-6           fs_1.5.0               lubridate_1.7.10      
##  [4] bit64_4.0.5            RColorBrewer_1.1-2     httr_1.4.2            
##  [7] tools_4.0.3            backports_1.2.1        bslib_0.2.4           
## [10] utf8_1.2.1             R6_2.5.0               DBI_1.1.1             
## [13] colorspace_2.0-0       withr_2.4.1            tidyselect_1.1.0      
## [16] curl_4.3               bit_4.0.4              compiler_4.0.3        
## [19] cli_2.3.1              rvest_1.0.0            xml2_1.3.2            
## [22] DelayedArray_0.16.3    labeling_0.4.2         sass_0.3.1            
## [25] scales_1.1.1           genefilter_1.72.1      digest_0.6.27         
## [28] foreign_0.8-80         rmarkdown_2.7          rio_0.5.26            
## [31] XVector_0.30.0         pkgconfig_2.0.3        htmltools_0.5.1.1     
## [34] highr_0.8              dbplyr_2.1.0           fastmap_1.1.0         
## [37] rlang_0.4.10           rstudioapi_0.13        RSQLite_2.2.5         
## [40] farver_2.1.0           jquerylib_0.1.3        generics_0.1.0        
## [43] jsonlite_1.7.2         BiocParallel_1.24.1    zip_2.1.1             
## [46] car_3.0-10             RCurl_1.98-1.3         magrittr_2.0.1        
## [49] GenomeInfoDbData_1.2.4 Matrix_1.2-18          Rcpp_1.0.6            
## [52] munsell_0.5.0          fansi_0.4.2            abind_1.4-5           
## [55] lifecycle_1.0.0        stringi_1.5.3          yaml_2.2.1            
## [58] carData_3.0-4          zlibbioc_1.36.0        grid_4.0.3            
## [61] blob_1.2.1             crayon_1.4.1           lattice_0.20-41       
## [64] haven_2.3.1            splines_4.0.3          annotate_1.68.0       
## [67] hms_1.0.0              locfit_1.5-9.4         pillar_1.5.1          
## [70] ggsignif_0.6.1         geneplotter_1.68.0     reprex_1.0.0          
## [73] XML_3.99-0.6           glue_1.4.2             evaluate_0.14         
## [76] data.table_1.14.0      modelr_0.1.8           vctrs_0.3.7           
## [79] cellranger_1.1.0       gtable_0.3.0           assertthat_0.2.1      
## [82] cachem_1.0.4           openxlsx_4.2.3         xfun_0.22             
## [85] xtable_1.8-4           broom_0.7.5            rstatix_0.7.0         
## [88] survival_3.2-7         AnnotationDbi_1.52.0   memoise_2.0.0         
## [91] ellipsis_0.3.1